Image Processing
This section documents the image processing capabilities available in the codebase.
Table of Contents
Image Captioning
File: models/image/image_caption.py
This module combines AWS Rekognition for identifying labels in images with OpenAI's GPT for generating natural language captions.
Initialization
from models.image.image_caption import ImageCaptioning
# Initialize with AWS and OpenAI credentials
image_captioner = ImageCaptioning(
aws_access_key_id='YOUR_AWS_ACCESS_KEY_ID',
aws_secret_access_key='YOUR_AWS_SECRET_ACCESS_KEY',
region_name='YOUR_AWS_REGION',
openai_api_key='YOUR_OPENAI_API_KEY'
)
Generate Caption
# Load image bytes
with open('path_to_image.jpg', 'rb') as image_file:
image_bytes = image_file.read()
# Generate caption
caption = image_captioner.caption_image(image_bytes)
print(caption)
How It Works
The captioning process follows these steps:
- The image is sent to AWS Rekognition to detect labels (objects, scenes, concepts)
- The detected labels are formatted into a comma-separated list
- This list is sent to OpenAI with a prompt to generate a descriptive caption
- The generated text is returned as the image caption
Color Scheme Extraction
File: models/image/image_color_theme.py
This module extracts dominant colors from images using k-means clustering.
Initialization
from models.image.image_color_theme import ImageColorScheme
# Initialize with the desired number of colors to extract
color_scheme = ImageColorScheme(num_colors=5)
Extract Colors from File
# Extract dominant colors and get hex codes
colors = color_scheme.get_hex_colors('path_to_image.jpg')
print(colors) # ['#ff5733', '#33ff57', '#5733ff', '#ff33a6', '#33a6ff']
Extract Colors from Image Bytes
# Extract colors from image bytes
with open('path_to_image.jpg', 'rb') as image_file:
image_bytes = image_file.read()
colors = color_scheme.get_hex_colors_from_bytes(image_bytes)
print(colors)
Raw RGB Colors
# Get RGB values instead of hex codes
raw_colors = color_scheme.extract_colors('path_to_image.jpg')
print(raw_colors) # [[255, 87, 51], [51, 255, 87], ...]
How It Works
The color extraction process works as follows:
- The image is loaded and resized for efficient processing
- The image colors are converted to RGB format
- K-means clustering is applied to identify the dominant color clusters
- The center of each cluster is extracted as a representative color
- Colors are converted to hex format for easy use in web and UI applications
AWS Rekognition
File: models/aws/rekognition.py
This module provides a client for Amazon Rekognition, AWS's computer vision service, offering a wide range of image and video analysis capabilities.
Initialization
from models.aws.rekognition import RekognitionClient
# Initialize with AWS credentials
rekognition = RekognitionClient(
aws_access_key_id='YOUR_AWS_ACCESS_KEY_ID',
aws_secret_access_key='YOUR_AWS_SECRET_ACCESS_KEY',
region_name='YOUR_AWS_REGION'
)
Detect Labels
# Load image bytes
with open('path_to_image.jpg', 'rb') as image_file:
image_bytes = image_file.read()
# Detect labels in the image
labels = rekognition.detect_labels(
image_bytes=image_bytes,
max_labels=10, # Maximum number of labels to return
min_confidence=80 # Minimum confidence percentage
)
print(labels)
Example response:
[
{
'Name': 'Car',
'Confidence': 99.15271759033203,
'Instances': [...],
'Parents': [{'Name': 'Vehicle'}, {'Name': 'Transportation'}]
},
{
'Name': 'Automobile',
'Confidence': 99.15271759033203,
'Instances': [],
'Parents': [{'Name': 'Vehicle'}, {'Name': 'Transportation'}]
},
# ...
]
Detect Faces
# Detect faces in the image
faces = rekognition.detect_faces(image_bytes)
print(faces)
Example response:
[
{
'BoundingBox': {
'Width': 0.6954022645950317,
'Height': 0.2544529736042023,
'Left': 0.1633375883102417,
'Top': 0.1475013792514801
},
'AgeRange': {'Low': 20, 'High': 30},
'Smile': {'Value': True, 'Confidence': 96.94185638427734},
'Eyeglasses': {'Value': False, 'Confidence': 99.69209289550781},
'Sunglasses': {'Value': False, 'Confidence': 99.9991226196289},
'Gender': {'Value': 'Female', 'Confidence': 99.99555969238281},
'Beard': {'Value': False, 'Confidence': 99.97463989257812},
'Mustache': {'Value': False, 'Confidence': 99.98968505859375},
'EyesOpen': {'Value': True, 'Confidence': 99.99991607666016},
'MouthOpen': {'Value': True, 'Confidence': 94.05339813232422},
'Emotions': [
{'Type': 'HAPPY', 'Confidence': 99.9469223022461},
{'Type': 'CALM', 'Confidence': 0.23001517355442047},
# ...
],
'Landmarks': [
{'Type': 'eyeLeft', 'X': 0.3295428156852722, 'Y': 0.2268327772617},
{'Type': 'eyeRight', 'X': 0.6795527935028076, 'Y': 0.2295929193496704},
# ...
],
'Pose': {
'Roll': -0.5577205419540405,
'Yaw': -0.9853221774101257,
'Pitch': 2.6768236160278
},
'Quality': {'Brightness': 43.768043518066406, 'Sharpness': 99.95819854736328},
'Confidence': 99.99998474121094
}
]
Detect Text
# Detect text in the image
text = rekognition.detect_text(image_bytes)
print(text)
Example response:
[
{
'DetectedText': 'HELLO',
'Type': 'LINE',
'Id': 0,
'Confidence': 99.35721588134766,
'Geometry': {...}
},
{
'DetectedText': 'WORLD',
'Type': 'LINE',
'Id': 1,
'Confidence': 99.6502914428711,
'Geometry': {...}
}
]
Compare Faces
# Load source and target images
with open('source_image.jpg', 'rb') as source_file:
source_image = source_file.read()
with open('target_image.jpg', 'rb') as target_file:
target_image = target_file.read()
# Compare faces between the two images
matches = rekognition.compare_faces(
source_image_bytes=source_image,
target_image_bytes=target_image,
similarity_threshold=90 # Only return matches with similarity >= 90%
)
print(matches)
Detect Moderation Labels
# Check for inappropriate content
moderation_labels = rekognition.detect_moderation_labels(
image_bytes=image_bytes,
min_confidence=80
)
print(moderation_labels)
Recognize Celebrities
# Identify celebrities in the image
celebrities = rekognition.recognize_celebrities(image_bytes)
print(celebrities)
Color Scheme Analysis
# Identify the main colors in the image
colors = rekognition.identify_color_scheme(image_bytes, num_colors=5)
print(colors)
Example response:
[
{'color': (240, 248, 255), 'count': 7890}, # RGB values and pixel count
{'color': (30, 144, 255), 'count': 4567},
{'color': (255, 255, 0), 'count': 2345},
{'color': (255, 0, 0), 'count': 1234},
{'color': (0, 0, 0), 'count': 567}
]
Detect Personal Protective Equipment
# Identify people and check if they're wearing PPE
ppe_detection = rekognition.detect_protective_equipment(image_bytes)
print(ppe_detection)
Advanced Use Cases
Multi-Step Processing
You can combine these services for more complex use cases:
# Analyze an image comprehensively
def analyze_image(image_path):
with open(image_path, 'rb') as image_file:
image_bytes = image_file.read()
# Initialize services
rekognition = RekognitionClient(...)
color_scheme = ImageColorScheme(num_colors=5)
# Get various analyses
labels = rekognition.detect_labels(image_bytes)
faces = rekognition.detect_faces(image_bytes)
text = rekognition.detect_text(image_bytes)
colors = color_scheme.get_hex_colors_from_bytes(image_bytes)
# Combine results
return {
"labels": labels,
"face_count": len(faces),
"emotions": [face.get('Emotions', []) for face in faces],
"text_content": [t['DetectedText'] for t in text],
"color_palette": colors
}
Image Content Moderation
def is_image_appropriate(image_bytes):
rekognition = RekognitionClient(...)
moderation_labels = rekognition.detect_moderation_labels(image_bytes)
# Check for inappropriate content
for label in moderation_labels:
if label['Confidence'] > 80 and label['Name'] in ['Explicit Nudity', 'Violence', 'Drugs']:
return False, label['Name']
return True, None